# Homemade FWD sampling ---------------------------------------------------
M   <- 100

# M   <- 1000
out <- rep(NA, M)
for (m in 1:M){
  lab = sample(c("Female", "Male"), size = 1, prob = c(.4,.6))
  if (lab == "Female") 
    out[m] <- rnorm(n = 1, mean = 162, sd = sqrt(5))
  else 
    out[m] <- rnorm(n = 1, mean = 176, sd = sqrt(8))
}
hist(out, main = "", xlab = "")

# Bart’s density ----------------------------------------------------------

########################################################################
# X ~ Bart Simpson's density (a.k.a. as "The Claw")                                
#     f(x) = 0.5*dnorm(0,1) + sum(mu in 0:4){0.1*dnorm((mu/2)-1, 0.1)}
# 
# Mixture of 6 Normal densities with:
# pi = c(0.5,  0.1,  0.1,  0.1,  0.1,  0.1)
# mu = c(0.0, -1.0, -0.5,  0.0,  0.5,  1.0)
# s  = c(1.0,  0.1,  0.1,  0.1,  0.1,  0.1)
########################################################################

# Some colors, just in case...
?rgb
colos <- c(rgb(32/255, 74/255, 135/255, 0.7),
           rgb(204/255, 0, 0, 0.7),
           rgb(200/255, 141/255, 0, 0.7),
           rgb(78/255, 153/255, 6/255, 0.7),
           rgb(32/255, 74/255, 135/255, 0.3),
           rgb(204/255, 0, 0, 0.3),
           rgb(200/255, 141/255, 0, 0.3),
           rgb(78/255, 153/255, 6/255, 0.3))

# Package and function
suppressMessages(require(mixtools, quietly = T))

?rnormmix

n <- 250
XX <- rnormmix(n, 
           lambda = c(0.5, rep(0.1,5)), 
           mu     = c(0, ((0:4)/2)-1), 
           sigma  = c(1, rep(0.1,5)) )

# Make an histogram of the data
hist(XX, prob = T, col = gray(.8), border = NA, xlab = "x",
     main = paste("Data from Bart's density",sep=""),
     sub = paste("n = ", n, sep = ""),
     breaks = 50)
# Show the data points
rug(XX, col = rgb(0,0,0,.5))

# Plot the true density
true.den = function(x) 0.5*dnorm(x, 0, 1) + 
                                0.1*dnorm(x,-1.0, 0.1) + 0.1*dnorm(x, -0.5, 0.1) +
                                0.1*dnorm(x, 0.0, 0.1) + 0.1*dnorm(x,  0.5, 0.1) +
                                0.1*dnorm(x, 1.0, 0.1)
curve(true.den, col = rgb(1,0,0,0.4), lwd = 3, n = 500, add = TRUE)

# Kernel density estimate
lines(density(XX),            col = colos[3], lwd = 3)   # Oversmoothing
lines(density(XX, bw = .08),  col = colos[4], lwd = 3)   # Just Right
lines(density(XX, bw = .008), col = colos[5], lwd = 3)   # Undersmoothing

# Add a legend
legend("topright", c("True","Over", "Just right", "Under"), lwd = 5,
       col = c(rgb(1,0,0,0.4), colos[3], colos[4],colos[5]), cex = 0.8, bty = "n")

# Handmade EM4MoG ---------------------------------------------------------

handmade.em <- function(y, p, mu, sigma, n_iter, plot_flag = T)
{
  # Init / 2 components only
  cols     <- c(rgb(1,0,0,.3), rgb(0,1,0,.3))
  like     <- p[1]*dnorm(y, mu[1], sigma[1]) + p[2]*dnorm(y, mu[2], sigma[2])
  deviance <- -2*sum(log(like))
  res      <- matrix(NA,n_iter + 1, 8)
  res[1,]  <- c(0, p, mu, sigma, deviance)
  for (iter in 1:n_iter) {
    # E step
    d1 <- p[1]*dnorm(y, mu[1], sigma[1])
    d2 <- p[2]*dnorm(y, mu[2], sigma[2])
    r1 <- d1/(d1 + d2)
    r2 <- 1 - r1
    
    # M step
    p[1]     <- mean(r1)
    mu[1]    <- sum(r1*y)/sum(r1)
    sigma[1] <-sqrt( sum(r1*(y^2))/sum(r1) - (mu[1])^2 )
    p[2]     <- 1 - p[1]
    mu[2]    <- sum((r2)*y)/sum((r2))
    sigma[2] <- sqrt(sum(r2*(y^2))/sum(r2) - (mu[2])^2)
    
    # -2 x log-likelihood (a.k.a. deviance)
    like     <- p[1]*dnorm(y, mu[1], sigma[1]) + p[2]*dnorm(y, mu[2], sigma[2])
    deviance <- -2*sum( log(like) )
    
    # Save
    res[iter+1,] <- c(iter, p, mu, sigma, deviance)
    
    # Plot
    if (plot_flag){
      hist(y, prob = T, breaks = 30, col = gray(.8), border = NA, 
           main = "", xlab = paste("EM Iteration: ", iter, "/", n_iter, sep = ""))
      set.seed(123)
      points(jitter(y), rep(0,length(y)), 
             pch = 19, cex = .6, 
             col = cols[ (dnorm(y,mu[1],sigma[1]) > dnorm(y,mu[2],sigma[2])) + 1])
      curve(p[1]*dnorm(x,mu[1],sigma[1]) + p[2]*dnorm(x,mu[2],sigma[2]),
            lwd = 4, col = rgb(0,0,0,.5), add = TRUE)
      Sys.sleep(1.5)
    }
  }
  res <- data.frame(res)
  names(res) <- c("iteration","p1","p2","mu1","mu2","sigma1","sigma2","deviance")
  out <- list(parameters = c(p = p, mu = mu, sigma = sigma), deviance = deviance, res = res)
  return(out)
}

data("faithful")
?faithful
hem_fit <- handmade.em(faithful$waiting, 
                       p      = c(.5,.5), 
                       mu     = c(45,55), 
                       sigma  = c(8,8), 
                       n_iter = 20)

round( hem_fit$parameters, 3 )
##     p1     p2    mu1    mu2 sigma1 sigma2 
##  0.360  0.640 54.596 80.079  5.855  5.880
hem_fit$deviance
## [1] 2068.005
# Handmade EM: Different Initial Values -----------------------------------

require(ggplot2)
## Loading required package: ggplot2
require(gridExtra)
## Loading required package: gridExtra
plotConvMC <- function(df, title = NULL)
{
  G      <- (ncol(df)-2)/3
  df$rep <- as.factor(df$rep)
  graf   <- vector("list", ncol(df)-2)
  for (j in (2:(ncol(df)-1))) {
    grafj <- ggplot(df) + geom_line(aes_string(df[,1],df[,j], 
                                               color = df[,ncol(df)])) +
      xlab("iteration") + ylab(names(df[j])) + theme(legend.position = "none")
    graf[[j-1]] <- grafj
  }
  do.call("grid.arrange", c(graf, ncol = 3, top = title))
}


D.em <- NULL
set.seed(1234)
for (m in (1:10)) {
  p1 <- runif(1,0.1,0.9)
  df.em <- handmade.em(faithful$waiting, 
                       p  = c(p1, 1 - p1), 
                       mu = rnorm(2, 70, 15), sigma = rlnorm(2, 2, 0.7),
                       n_iter = 50, plot_flag = F)$res
  df.em$rep <- m
  D.em <- rbind(D.em,df.em)
}
plotConvMC(D.em)

# Up to some permutation (labels switching), all the runs converge to the same solution.
# A very poor initial guess may lead to a very poor convergence of EM.

handmade.em(faithful$waiting, p = c(0.5, 0.5), mu = c(30, 40), sigma = c(2, 10), n_iter = 5)

## $parameters
##     p1     p2    mu1    mu2 sigma1 sigma2 
##    NaN    NaN    NaN    NaN    NaN    NaN 
## 
## $deviance
## [1] NaN
## 
## $res
##   iteration           p1        p2      mu1      mu2       sigma1   sigma2
## 1         0 5.000000e-01 0.5000000 30.00000 40.00000 2.000000e+00 10.00000
## 2         1 1.290643e-11 1.0000000 43.00624 70.89706 1.130252e-01 13.56996
## 3         2 4.706595e-11 1.0000000 43.00000 70.89706 8.259062e-07 13.56996
## 4         3 2.337458e-05 0.9999766 43.00000 70.89771 8.259062e-07 13.56945
## 5         4 3.675314e-03 0.9963247 43.00000 70.99997 0.000000e+00 13.48858
## 6         5          NaN       NaN      NaN      NaN          NaN      NaN
##   deviance
## 1 5227.041
## 2 2190.578
## 3 2190.565
## 4 2174.461
## 5     -Inf
## 6      NaN
# In this example, one of the variances converges essentially to 0...

# mixtools ----------------------------------------------------------------

?normalmixEM
mt_fit = normalmixEM(faithful$waiting)
## number of iterations= 24
summary(mt_fit)
## summary of normalmixEM object:
##           comp 1    comp 2
## lambda  0.360887  0.639113
## mu     54.614891 80.091091
## sigma   5.871244  5.867717
## loglik at estimate:  -1034.002
names(mt_fit)
## [1] "x"          "lambda"     "mu"         "sigma"      "loglik"    
## [6] "posterior"  "all.loglik" "restarts"   "ft"
plot(mt_fit)

class(mt_fit)
## [1] "mixEM"
?plot.mixEM
plot(mt_fit, density = TRUE, w = 1.1)

# Classes
(mt_fit$posterior[,1] < 0.5) + 1
##   [1] 2 1 2 1 2 1 2 2 1 2 1 2 2 1 2 1 1 2 1 2 1 1 2 2 2 2 1 2 2 2 2 2 1 2 2 1 1
##  [38] 2 1 2 2 1 2 1 2 2 1 1 2 1 2 2 1 2 1 2 2 1 2 2 1 2 1 2 1 2 2 2 1 2 2 1 2 2
##  [75] 1 2 1 2 2 2 2 2 2 1 2 2 2 2 1 2 1 2 1 2 1 2 2 2 1 2 1 2 1 2 2 1 2 1 2 2 2
## [112] 1 2 2 1 2 1 2 1 2 1 2 2 1 2 2 1 2 1 2 1 2 1 2 1 2 1 2 1 2 2 1 2 2 2 1 2 1
## [149] 2 1 2 2 1 2 2 2 2 2 1 2 1 2 1 2 1 2 1 2 1 2 1 1 2 2 2 2 2 1 2 2 1 2 2 2 1
## [186] 2 2 1 2 1 2 1 2 2 2 2 2 2 1 2 1 2 2 1 2 1 2 2 1 2 2 2 1 2 1 2 1 2 1 2 1 2
## [223] 1 2 2 2 2 2 2 2 2 1 2 1 2 1 1 2 2 1 2 1 2 1 2 2 1 2 2 2 1 2 2 2 2 2 2 2 1
## [260] 2 2 2 1 2 1 1 2 2 1 2 1 2
# To be compared with its "extreme" version: k-means (with 2 centers)
?kmeans
km_fit <- kmeans( faithful$waiting, centers = c(45, 55) )
names(km_fit)
## [1] "cluster"      "centers"      "totss"        "withinss"     "tot.withinss"
## [6] "betweenss"    "size"         "iter"         "ifault"
# Compute the proportion, center and standard deviation for each cluster,
list(p     = km_fit$size/sum(km_fit$size), 
     mu    = as.vector(km_fit$centers), 
     sigma = sqrt(km_fit$withinss/km_fit$size)
     )
## $p
## [1] 0.3676471 0.6323529
## 
## $mu
## [1] 54.75000 80.28488
## 
## $sigma
## [1] 5.865791 5.610953
#...compare...
list(p = mt_fit$lambda, mu = mt_fit$mu, sigma = mt_fit$sigma)
## $p
## [1] 0.3608869 0.6391131
## 
## $mu
## [1] 54.61489 80.09109
## 
## $sigma
## [1] 5.871244 5.867717
# Model Based :: MoG ------------------------------------------------------

## Geyser Data ---
suppressMessages(require(plotly, quietly = T))
?MASS::geyser
dd <- MASS::geyser
kd <- with(MASS::geyser, MASS::kde2d(duration, waiting, n = 50))
p <- plot_ly(x = kd$x, y = kd$y, z = kd$z) %>% add_surface()
p
# Mixture of Gaussian
suppressMessages(require(mclust, quietly = T))
?mclust

mod1 = Mclust(dd)
mod1
## 'Mclust' model object: (VVI,4) 
## 
## Available components: 
##  [1] "call"           "data"           "modelName"      "n"             
##  [5] "d"              "G"              "BIC"            "loglik"        
##  [9] "df"             "bic"            "icl"            "hypvol"        
## [13] "parameters"     "z"              "classification" "uncertainty"
summary(mod1)
## ---------------------------------------------------- 
## Gaussian finite mixture model fitted by EM algorithm 
## ---------------------------------------------------- 
## 
## Mclust VVI (diagonal, varying volume and shape) model with 4 components: 
## 
##  log-likelihood   n df       BIC       ICL
##        -1330.13 299 19 -2768.568 -2798.746
## 
## Clustering table:
##  1  2  3  4 
## 90 17 98 94
mod1$classification
##   1   2   3   4   5   6   7   8   9  10  11  12  13  14  15  16  17  18  19  20 
##   1   4   3   1   1   4   3   1   4   3   4   3   4   3   1   4   3   4   3   1 
##  21  22  23  24  25  26  27  28  29  30  31  32  33  34  35  36  37  38  39  40 
##   4   3   4   3   4   3   2   1   1   1   1   1   4   3   2   3   4   3   4   3 
##  41  42  43  44  45  46  47  48  49  50  51  52  53  54  55  56  57  58  59  60 
##   4   3   4   3   4   3   4   3   4   3   2   3   4   3   4   3   1   2   1   1 
##  61  62  63  64  65  66  67  68  69  70  71  72  73  74  75  76  77  78  79  80 
##   4   3   4   3   4   3   4   3   1   4   3   4   3   1   1   4   3   1   1   1 
##  81  82  83  84  85  86  87  88  89  90  91  92  93  94  95  96  97  98  99 100 
##   1   4   3   2   1   4   3   4   3   4   3   4   3   4   3   4   3   4   3   4 
## 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 
##   3   4   3   4   3   4   3   4   3   2   3   1   4   3   4   3   4   3   4   3 
## 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 
##   4   3   1   1   4   3   1   1   1   1   1   1   2   3   1   1   1   1   4   3 
## 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 
##   1   1   1   1   1   1   4   3   2   3   4   3   4   3   4   3   4   1   1   1 
## 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 
##   1   1   1   4   3   4   3   4   3   2   3   2   1   4   3   2   3   4   3   1 
## 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 
##   4   3   4   3   1   1   1   4   3   4   3   4   3   4   3   2   1   4   3   4 
## 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 
##   3   4   3   1   4   1   1   2   1   1   1   4   3   4   3   4   3   2   1   1 
## 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 
##   4   3   1   1   1   1   1   1   4   3   4   3   4   3   1   1   1   4   3   1 
## 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 
##   4   3   2   1   4   3   2   4   3   4   3   1   1   4   3   4   3   1   1   1 
## 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 
##   1   4   3   1   1   4   3   4   3   2   3   1   4   3   4   3   1   1   1   1 
## 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 
##   1   1   1   4   1   4   3   4   3   4   3   4   3   4   3   4   3   1   4
# Default plot
class(mod1)
## [1] "Mclust"
?plot.Mclust

par(mfrow = c(2,2))
plot(mod1, what = "uncertainty")
plot(mod1, what = "classification")
plot(mod1, what = "density")
plot(mod1, what = "BIC")

par(mfrow = c(1,1))

## Circle ---

# Noiseless
set.seed(123)
n      <- 500
radius <- 2
angle  <- runif(n)*pi*2
x      <- cos(angle)*radius
y      <- sin(angle)*radius

# Jitter around
SD <- .15
xn <- x + rnorm(n, sd = SD)
yn <- y + rnorm(n, sd = SD)
XX <- cbind(xn, yn)
plot(x,y, pch = 19, col = "black", asp = 1, cex = .5)
points(xn, yn, pch = 19, col = "red", cex = .5)

mod2 = Mclust(cbind(xn,yn))
mod2
## 'Mclust' model object: (EEV,8) 
## 
## Available components: 
##  [1] "call"           "data"           "modelName"      "n"             
##  [5] "d"              "G"              "BIC"            "loglik"        
##  [9] "df"             "bic"            "icl"            "hypvol"        
## [13] "parameters"     "z"              "classification" "uncertainty"
summary(mod2)
## ---------------------------------------------------- 
## Gaussian finite mixture model fitted by EM algorithm 
## ---------------------------------------------------- 
## 
## Mclust EEV (ellipsoidal, equal volume and shape) model with 8 components: 
## 
##  log-likelihood   n df       BIC       ICL
##        -1013.33 500 33 -2231.742 -2373.283
## 
## Clustering table:
##  1  2  3  4  5  6  7  8 
## 76 45 76 58 45 69 66 65
par(mfrow = c(2,2))
plot(mod2, what = "uncertainty")
plot(mod2, what = "classification")
plot(mod2, what = "density")
plot(mod2, what = "BIC")

par(mfrow = c(1,1))

# kmeans++ ----------------------------------------------------------------

# k-means++ is a routine to suggest center points before the classical k-means 
# is called. The following lines of code will do that, where X is a matrix of 
# data points, as requested for kmeans, and k the number of centers:

suppressWarnings(require(pracma, quietly = T))
?distmat
?kmeans

kmpp <- function(X, k) {
  n <- nrow(X)
  C <- numeric(k)
  C[1] <- sample(1:n, 1)
  
  for (i in 2:k) {
    dm <- distmat(X, X[C, ])
    pr <- apply(dm, 1, min); pr[C] <- 0
    C[i] <- sample(1:n, 1, prob = pr)
  }
  kmeans(X, X[C, ])
}

# Here distmat(a, b) should return the distances between the rows of two 
# matrices a and b There may be several implementations in R, one is distmat()
# in package pracma.
#
# Please note that AFAIK it is not clear whether the approach of kmeans++ is 
# really better than, e.g., kmeans with several restarts.

# suppressMessages(require(ggfortify, quietly = T))
# ?autoplot
# suppressMessages(require(useful, quietly = T))
# ?plot.kmeans

suppressMessages(require(factoextra, quietly = T))
library(help = factoextra)

# Plot
?fviz_cluster
  
ddmat <- as.matrix(dd)
km3 <- kmpp(ddmat, k = 3)
fviz_cluster(km3, data = ddmat, 
             geom = "point",
             ellipse.type = "norm",
             palette = "Dark2",
             ggtheme = theme_minimal())

km4 <- kmpp(ddmat, k = 4)
fviz_cluster(km4, data = ddmat, 
             geom = "point",
             ellipse.type = "norm",
             palette = "Dark2",
             ggtheme = theme_minimal())

# Selecting k...
?fviz_nbclust

fviz_nbclust(ddmat, kmeans, method = "wss")

fviz_nbclust(ddmat, kmeans, method = "silhouette")

fviz_nbclust(ddmat, kmeans, method = "gap_stat")

# k = 2, 3, or 4 seem promising.